/**
 * Package name:indi.yangshenhui.crawlers
 * File name:Basic.java
 * Date:2016年7月22日-下午4:31:48
 * feiniu.com Inc.Copyright (c) 2013-2015 All Rights Reserved.
 *
 */
package indi.yangshenhui.crawlers;

import java.util.List;

import com.alibaba.fastjson.JSON;

import cn.wanghaomiao.seimi.annotation.Crawler;
import cn.wanghaomiao.seimi.def.BaseSeimiCrawler;
import cn.wanghaomiao.seimi.struct.Request;
import cn.wanghaomiao.seimi.struct.Response;
import cn.wanghaomiao.xpath.model.JXDocument;

/**
 * @ClassName Basic
 * @Description
 * @date 2016年7月22日 下午4:31:48
 * @author shenhui.yang
 * @version 1.0.0
 *
 */
//@Crawler(name = "basic")
@SuppressWarnings("unused")
public class Basic extends BaseSeimiCrawler {
	@Override
	public String[] startUrls() {
		return new String[] { "http://www.cnblogs.com/" };
	}

	@Override
	public void start(Response response) {
		JXDocument doc = response.document();
		try {
			List<Object> urls = doc.sel("//a[@class='titlelnk']/@href");
			System.err.println(JSON.toJSONString(urls));
//			logger.info("{}", urls.size());
			for (Object s : urls) {
				push(new Request(s.toString(), "getTitle"));
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

	public void getTitle(Response response) {
		JXDocument doc = response.document();
		try {
//			logger.info("url:{} {}", response.getUrl(),
//					doc.sel("//h1[@class='postTitle']/a/text()|//a[@id='cb_post_title_url']/text()"));
			System.err.println(response.getUrl()+"\t"+doc.sel("//h1[@class='postTitle']/a/text()|//a[@id='cb_post_title_url']/text()"));
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
}
